
library(lfe)
library(dplyr)
library(bife)
library(plm)
library(PanelMatch)
library(ggplot2)
library(purrr)
library(sidrar)
library(tibble)

rais <- read.csv("data/rais_matched.csv")

apoios <- read.csv("rais/rais19_apoios.csv")

rais$aliados <- ifelse(rais$CPF %in% apoios$CPF & rais$year == 2019, 1, 0)

rais$aliados_all <- ifelse(rais$CPF %in% apoios$CPF, 1, 0)

rais$post <- ifelse(rais$year == 2019, 1, 0)

ci <- function(var, alpha = 0.05) {
    mean <- mean(var, na.rm = TRUE)
    n <- sqrt(sum(!is.na(var)))
    se <- sd(var, na.rm = TRUE) / n
    lower <- mean - qt(1 - (alpha / 2), n - 1) * se
    upper <- mean + qt(1 - (alpha / 2), n - 1) * se
    return(c(lower, upper))
}

# remmedia_dif_ocup

remmedia_dif_ocup <- rais %>%
    group_by(aliados_all, year) %>%
    summarise(
        mean = mean(remmedia_dif_ocup, na.rm = TRUE),
        lower = ci(remmedia_dif_ocup)[1],
        upper = ci(remmedia_dif_ocup)[2]
    )

aliados <- remmedia_dif_ocup[remmedia_dif_ocup$aliados_all == 1, ]
nonaliados <- remmedia_dif_ocup[remmedia_dif_ocup$aliados_all == 0, ]

png("plots/remmedia_dif_ocup_diff.png")
plot(
    x = aliados$year,
    y = aliados$mean,
    cex = 1,
    xlim = c(2013, 2020),
    ylim = c(0.9, 1.35),
    frame = FALSE,
    col = "#003f5c",
    bty = "l",
    lwd = 2,
    pch = 16,
    type = "b",
    ylab = "Av. wage difference with occupation (in th. reais)",
    xlab = "Year"
)
lines(
    x = nonaliados$year,
    y = nonaliados$mean,
    cex = 1,
    xlab = "",
    ylab = "",
    col = "#ffa600",
    bty = "l",
    lwd = 2,
    pch = 16,
    type = "b"
)
# add back the X-axis
# add the confidence intervals
arrows(aliados$year, aliados$lower,
    aliados$year, aliados$upper,
    lwd = 2,
    col = "#003f5c",
    code = 3,
    angle = 90,
    length = 0.05
)
arrows(nonaliados$year, nonaliados$lower,
    nonaliados$year, nonaliados$upper,
    lwd = 2,
    col = "#ffa600",
    code = 3,
    angle = 90,
    length = 0.05
)
legend(
    x = "topright",
    legend = c("Aliados", "Non-Aliados"),
    col = c("#003f5c", "#ffa600"),
    pch = c(16, 16),
    bty = "n",
    pt.cex = 1,
    cex = 1.2,
    text.col = "black",
    horiz = F,
    inset = c(0.1, 0.1)
)
dev.off()

# remmedia_dif_educ

remmedia_dif_educ <- rais %>%
    group_by(aliados_all, year) %>%
    summarise(
        mean = mean(remmedia_dif_educ, na.rm = TRUE),
        lower = ci(remmedia_dif_educ)[1],
        upper = ci(remmedia_dif_educ)[2]
    )

aliados <- remmedia_dif_educ[remmedia_dif_educ$aliados_all == 1, ]
nonaliados <- remmedia_dif_educ[remmedia_dif_educ$aliados_all == 0, ]

png("plots/remmedia_dif_educ_diff.png")
plot(
    x = aliados$year,
    y = aliados$mean,
    cex = 1,
    xlim = c(2013, 2020),
    ylim = c(1.4, 1.9),
    frame = FALSE,
    col = "#003f5c",
    bty = "l",
    lwd = 2,
    pch = 16,
    type = "b",
    ylab = "Av. wage difference with educational group (in th. reais)",
    xlab = "Year"
)
lines(
    x = nonaliados$year,
    y = nonaliados$mean,
    cex = 1,
    xlab = "",
    ylab = "",
    col = "#ffa600",
    bty = "l",
    lwd = 2,
    pch = 16,
    type = "b"
)
# add back the X-axis
# add the confidence intervals
arrows(aliados$year, aliados$lower,
    aliados$year, aliados$upper,
    lwd = 2,
    col = "#003f5c",
    code = 3,
    angle = 90,
    length = 0.05
)
arrows(nonaliados$year, nonaliados$lower,
    nonaliados$year, nonaliados$upper,
    lwd = 2,
    col = "#ffa600",
    code = 3,
    angle = 90,
    length = 0.05
)
legend(
    x = "topright",
    legend = c("Aliados", "Non-Aliados"),
    col = c("#003f5c", "#ffa600"),
    pch = c(16, 16),
    bty = "n",
    pt.cex = 1,
    cex = 1.2,
    text.col = "black",
    horiz = F,
    inset = c(0.1, 0.1)
)
dev.off()

# five year income trajectory

remmedia_m <- felm(remmedia ~ aliados_all:post | CPF + year | 0 | CPF, data = rais)

remmedia_dif_ocup_m <- felm(remmedia_dif_ocup ~ aliados_all:post | CPF + year | 0 | CPF, data = rais)

remmedia_dif_educ_m <- felm(remmedia_dif_educ ~ aliados_all:post | CPF + year | 0 | CPF, data = rais)

lines <- list(
    c("Individual effects", "Yes", "Yes", "Yes"),
    c("Year effects", "Yes", "Yes", "Yes")
)

cat(
    stargazer::stargazer(
        remmedia_m,
        remmedia_dif_ocup_m,
        remmedia_dif_educ_m,
        type = "latex",
        dep.var.labels = c("Av. monthly wage (th. rs.)", "Wage diff. occupation", "Wage diff. educ. group"),
        add.lines = lines,
        omit.stat = c("rsq", "adj.rsq", "f", "ser"),
        covariate.labels = c("Aliados * t = 2019"),
        title = "OLS - Robust standard errors.",
        label = "tab:rais_aliados_diff",
        model.numbers = FALSE
    ),
    file = "tables/rais_aliados_diff.tex", sep = "\n"
)

remmedia_m <- felm(remmedia ~ aliados_all * post | 0 | 0 | CPF, data = rais)

remmedia_dif_ocup_m <- felm(remmedia_dif_ocup ~ aliados_all * post | 0 | 0 | CPF, data = rais)

remmedia_dif_educ_m <- felm(remmedia_dif_educ ~ aliados_all * post | 0 | 0 | CPF, data = rais)

remmedia_dif_industry_m <- felm(remmedia_dif_industry ~ aliados_all * post | 0 | 0 | CPF, data = rais)

medianfirm_m <- felm(medianfirm ~ aliados_all * post | 0 | 0 | CPF, data = rais)

cat(
    stargazer::stargazer(
        remmedia_m,
        remmedia_dif_ocup_m,
        remmedia_dif_educ_m,
        remmedia_dif_industry_m,
        medianfirm_m,
        type = "latex",
        dep.var.labels = c("Wage", "Occup. dif.", "Educ. dif.", "Firm-Industry", "Firm median"),
        omit.stat = c("rsq", "adj.rsq", "f", "ser"),
        covariate.labels = c("Aliados", "t = 2019", "Aliados * t = 2019"),
        title = "OLS - Robust standard errors.",
        label = "tab:rais_aliados_post",
        model.numbers = FALSE
    ),
    file = "tables/rais_aliados_post.tex", sep = "\n"
)

# Step 1: Create a lookup table for price levels by state capital

capital_price_levels <- tibble::tibble(
    uf = c("BA", "CE", "DF", "GO", "MG", "PA", "PE", "PR", "RJ", "RS", "SP"),
    municipio_code = c(
        292740, 230440, 530010, 520870, 310620,
        150140, 261160, 410690, 330455, 431490, 355030
    ),
    capital = c(
        "Salvador", "Fortaleza", "Brasília", "Goiânia", "Belo Horizonte",
        "Belém", "Recife", "Curitiba", "Rio de Janeiro", "Porto Alegre", "São Paulo"
    ),
    price_level = c(
        3772.41, 3605.18, 3833.91, 4034.14, 4113.88,
        4032.72, 3880.80, 3816.57, 4145.41, 3685.19, 4011.61
    )
)

# Step 2: Create a lookup of state → capital price level

uf_to_price <- capital_price_levels %>%
    select(uf, price_level)

# Step 3: Create a state code → UF acronym mapping (needed if you have 6-digit IBGE codes)

state_code_to_uf <- c(
    "12" = "AC", "27" = "AL", "13" = "AM", "16" = "AP", "29" = "BA", "23" = "CE",
    "53" = "DF", "32" = "ES", "52" = "GO", "21" = "MA", "31" = "MG", "51" = "MT",
    "50" = "MS", "15" = "PA", "25" = "PB", "26" = "PE", "22" = "PI", "41" = "PR",
    "33" = "RJ", "24" = "RN", "43" = "RS", "11" = "RO", "14" = "RR", "42" = "SC",
    "28" = "SE", "35" = "SP", "17" = "TO"
)

# Step 4: Assign capital price level to all municipalities in those states

rais <- rais %>%
    mutate(
        state_code = substr(as.character(municipio), 1, 2),
        uf = state_code_to_uf[state_code]
    ) %>%
    left_join(uf_to_price, by = "uf")

# Adjust income variables by price levels (national price level in 01-24 was 3946.44)

rais$remmedia_adj <- rais$remmedia * (rais$price_level / 3946.44)
rais$remmedia_dif_ocup_adj <- rais$remmedia_dif_ocup * (rais$price_level / 3946.44)
rais$remmedia_dif_educ_adj <- rais$remmedia_dif_educ * (rais$price_level / 3946.44)


remmedia_m <- felm(remmedia_adj ~ aliados_all:post | CPF + year | 0 | CPF, data = rais)

remmedia_dif_ocup_m <- felm(remmedia_dif_ocup_adj ~ aliados_all:post | CPF + year | 0 | CPF, data = rais)

remmedia_dif_educ_m <- felm(remmedia_dif_educ_adj ~ aliados_all:post | CPF + year | 0 | CPF, data = rais)

lines <- list(
    c("Individual effects", "Yes", "Yes", "Yes"),
    c("Year effects", "Yes", "Yes", "Yes")
)

cat(
    stargazer::stargazer(
        remmedia_m,
        remmedia_dif_ocup_m,
        remmedia_dif_educ_m,
        type = "latex",
        dep.var.labels = c("Av. monthly wage (th. rs.)", "Wage diff. occupation", "Wage diff. educ. group"),
        add.lines = lines,
        omit.stat = c("rsq", "adj.rsq", "f", "ser"),
        covariate.labels = c("Aliados * t = 2019"),
        title = "OLS - Robust standard errors.",
        label = "tab:rais_aliados_inpc_diff",
        model.numbers = FALSE
    ),
    file = "tables/rais_aliados_inpc_diff.tex", sep = "\n"
)